import random
import pickle
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os
import sys
for int_batch_number in range(100):
    batch_number = str(int_batch_number)
    topic_number = 26
    random.seed(int(batch_number))
    # Create tmp folders
    tmp_folder = '../data/tmp_files/new_topic_number_26/boosting_files/all_four/' + batch_number + '/'
    tmp_folder_1 = '../data/tmp_files/new_topic_number_26/boosting_files/congress_mayor/' + batch_number + '/'
    tmp_folder_2 = '../data/tmp_files/new_topic_number_26/boosting_files/congress_governor/' + batch_number + '/'
    tmp_folder_3 = '../data/tmp_files/new_topic_number_26/boosting_files/congress_soccer/' + batch_number + '/'
    os.makedirs(tmp_folder, exist_ok=True)
    os.makedirs(tmp_folder_1, exist_ok=True)
    os.makedirs(tmp_folder_2, exist_ok=True)
    os.makedirs(tmp_folder_3, exist_ok=True)

    congress_tweet_dict = pickle.load(open('./congress_tweet_dict_4th_no_timelimit', 'rb'))
    dw_dict = pickle.load(open('./115th_congress_dw_nominate', 'rb'))
    name_state_handle_dict = pickle.load(open('./115th_congress_name_state_handle_dict', 'rb'))
    congress_tweet_dict_filter = {}
    congress_tweet_num_list = []
    for congress_name, twitter_list in congress_tweet_dict.items():
        if len(twitter_list) >= 100:
            name = congress_name.lower()
            if name in name_state_handle_dict:
                last_name_state = name_state_handle_dict[name]
                if last_name_state in dw_dict:
                    congress_tweet_dict_filter[congress_name] = random.sample(twitter_list, 100)
    pickle.dump(congress_tweet_dict_filter, open(tmp_folder + 'congress_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(congress_tweet_dict_filter, open(tmp_folder_1 + 'congress_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(congress_tweet_dict_filter, open(tmp_folder_2 + 'congress_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(congress_tweet_dict_filter, open(tmp_folder_3 + 'congress_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))

    mayor_tweet_dict = pickle.load(open('./mayor_tweet_dict_4th_no_timelimit', 'rb'))
    mayor_party_affiliation_dict = pickle.load(open('./mayor_party_affiliation_dict', 'rb'))
    mayor_tweet_dict_filter = {}
    total_num = 0
    for mayor_name, twitter_list in mayor_tweet_dict.items():
        if mayor_name != 'na':
            if len(twitter_list) >= 100:
                if mayor_party_affiliation_dict[mayor_name] == 'rep' or mayor_party_affiliation_dict[mayor_name] == 'dem':
                    mayor_tweet_dict_filter[mayor_name] = random.sample(twitter_list, 100)
    pickle.dump(mayor_tweet_dict_filter, open(tmp_folder + 'mayor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(mayor_tweet_dict_filter, open(tmp_folder_1 + 'mayor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(mayor_tweet_dict_filter, open(tmp_folder_2 + 'mayor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(mayor_tweet_dict_filter, open(tmp_folder_3 + 'mayor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    #
    governor_tweet_dict = pickle.load(open('./governor_tweet_dict_4th_no_timelimit', 'rb'))
    governor_party_affiliation_dict = pickle.load(open('./governor_party_affiliation_dict', 'rb'))
    governor_tweet_dict_filter = {}
    total_num = 0
    for governor_name, twitter_list in governor_tweet_dict.items():
        if governor_name != 'na':
            if len(twitter_list) >= 100:
                if governor_party_affiliation_dict[governor_name] == 'r' or governor_party_affiliation_dict[governor_name] == 'd':
                    governor_tweet_dict_filter[governor_name] = random.sample(twitter_list, 100)
    pickle.dump(governor_tweet_dict_filter, open(tmp_folder + 'governor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(governor_tweet_dict_filter, open(tmp_folder_1 + 'governor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(governor_tweet_dict_filter, open(tmp_folder_2 + 'governor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(governor_tweet_dict_filter, open(tmp_folder_3 + 'governor_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))

    soccer_tweet_dict = pickle.load(open('./soccer_tweet_dict_4th_no_timelimit', 'rb'))
    soccer_tweet_dict_filter = {}
    total_num = 0
    for soccer_name, twitter_list in soccer_tweet_dict.items():
        if soccer_name != 'na':
            if len(twitter_list) >= 100:
                    soccer_tweet_dict_filter[soccer_name] = random.sample(twitter_list, 100)
    pickle.dump(soccer_tweet_dict_filter, open(tmp_folder + 'soccer_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(soccer_tweet_dict_filter, open(tmp_folder_1 + 'soccer_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(soccer_tweet_dict_filter, open(tmp_folder_2 + 'soccer_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
    pickle.dump(soccer_tweet_dict_filter, open(tmp_folder_3 + 'soccer_tweet_dict_filter_4th_sample_no_timelimit_' + batch_number, 'wb'))
